/* Take the estimated discontinuities from the experts and the estimators. Compute
RMSE. Compare graphically and through regression.
Humans were asked to round their estimates to the nearest .01. Do the same for
the estimators for fairness.
Also run a version of this where we trim the 10 worst guesses for each estimator
for each DGP.
To figure out whether RMSE is driven by bias or variance, plot the absolute deviations
for each method. For each DGP-discontinuity pair, compute the average guess for a
method. Compute the absolute value of the difference between that and the true
discontinuity. Then take the average over each DGP.
Note that we remove do not remove 0 discontinuities.
When an estimator fails to reject, change its guess to 0.
*/

clear
version 13.1
set more off
set scheme  s1mono 
		
cd "${main}"

cap log close
log using "${logs}/experts_vs_estimators_rmse2.log", replace

use "${dat}/expert_reported_discontinuities_cleaned.dta", clear
split gid, p("_")
gen file = gid1 + "_" + gid2 + "_" + gid7
merge m:1 file using "${dat}/rd_estimates_micro.dta"
keep if _merge == 3

* Compute rejection decisions (for non-RDHonest estimators)
gen D_reject = D_cl > 0 | D_cu < 0
gen rd_c_mse_reject = rd_c_cl_mse > 0 | rd_c_cu_mse < 0
gen rd_bc_mse_reject = rd_r_cl_mse > 0 | rd_r_cu_mse < 0
gen rd_c_ik_reject = rd_c_cl_ik > 0 | rd_c_cu_ik < 0

rename rdh_taylor_rot_estimate rdh_taylor_rot_est
rename rdh_taylor_rot_reject rdh_taylor_rot_est_rej


* Compute squared error for each estimator
foreach est in playerstated_disc_size D rd_c_mse rd_bc_mse rd_c_ik ///
	rdh_taylor_rot_est {
	
	* When don't reject null, guess 0
	if "`est'" != "playerstated_disc_size" {
		replace `est' = `est' * `est'_rej
	}
	
	replace `est' = round(`est', 0.01)
	gen `est'_t = `est' // Trimmed version
	
	* Trim
	foreach dgp in "ADGP1" "BDGP2" "CDGP3" "DDGP4" "EDGP5" "FDGP6" "GDGP7" "HDGP8" "IDGP9" "JDGP10" "KDGP11" {
		gen tosort1 = playerdgp == "`dgp'"
		gen tosort2 = abs(`est' - discont_raw)
		sort tosort1 tosort2
		replace `est'_t = . if _N - _n < 10
		drop tosort*
	} 
	
	sort playerdgp playerdisc
	* DGPxDiscontinuity average guess for the method
	by playerdgp playerdisc: egen d`est' = mean(`est')
	by playerdgp playerdisc: egen d`est'_t = mean(`est'_t)
	* Absolute deviation
	gen a`est' = abs(d`est' - discont_raw)
	gen a`est'_t = abs(d`est'_t - discont_raw)
	
	replace `est' = (`est' - discont_raw)^2
	replace `est'_t = (`est'_t - discont_raw)^2
	
}

* Compute averages
collapse (mean) playerstated_disc_size D rd_c_mse rd_bc_mse rd_c_ik ///
	rdh_taylor_rot_est ///
	playerstated_disc_size_t D_t rd_c_mse_t rd_bc_mse_t rd_c_ik_t ///
	rdh_taylor_rot_est_t ///
	aplayerstated_disc_size aD ard_c_mse ard_bc_mse ard_c_ik ///
	ardh_taylor_rot_est ///
	aplayerstated_disc_size_t aD_t ard_c_mse_t ard_bc_mse_t ard_c_ik_t ///
	ardh_taylor_rot_est_t, by(playerdgp)
	
* Take root to get RMSE
foreach est in playerstated_disc_size D rd_c_mse rd_bc_mse rd_c_ik ///
	rdh_taylor_rot_est {
	
	replace `est' = sqrt(`est')
	replace `est'_t = sqrt(`est'_t)
}

* Go wide to long
rename playerstated_disc_size rmse1
rename D rmse2
rename rd_c_mse rmse3
rename rd_bc_mse rmse4
rename rd_c_ik rmse5
rename rdh_taylor_rot_est rmse6
rename playerstated_disc_size_t rmse_t1
rename D_t rmse_t2
rename rd_c_mse_t rmse_t3
rename rd_bc_mse_t rmse_t4
rename rd_c_ik_t rmse_t5
rename rdh_taylor_rot_est_t rmse_t6
rename aplayerstated_disc_size ad1
rename aD ad2
rename ard_c_mse ad3
rename ard_bc_mse ad4
rename ard_c_ik ad5
rename ardh_taylor_rot_est ad6
rename aplayerstated_disc_size_t ad_t1
rename aD_t ad_t2
rename ard_c_mse_t ad_t3
rename ard_bc_mse_t ad_t4
rename ard_c_ik_t ad_t5
rename ardh_taylor_rot_est_t ad_t6

* Untrimmed data
tempfile full
save "`full'"

********
* RMSE *
********
use "`full'", clear
keep playerdgp rmse1 rmse2 rmse3 rmse4 rmse5 rmse6

reshape long rmse, i(playerdgp) j(method)

gen method_name = string(method)
replace method_name = "Expert" if method_name == "1"
replace method_name = "PQ" if method_name == "2"
replace method_name = "CCT Conv." if method_name == "3"
replace method_name = "CCT Rob." if method_name == "4"
replace method_name = "IK" if method_name == "5"
replace method_name = "AK" if method_name == "6"

*By player DGP:
*DGP specific graphs:
levelsof playerdgp, local(levels) 
local count = 1
foreach l of local levels{
preserve
keep if playerdgp == "`l'"
graph hbar rmse, over(method_name) ///
	ytitle("RMSE") ///
	title("DGP `count'") ///
	name(graph`count', replace)

local count = `count' + 1
restore
}
graph combine graph1 graph2 graph3 graph4 graph5 graph6 graph7 graph8 graph9 graph10 graph11
graph export "${output}/experts_vs_estimators_rmse2.pdf", as(pdf) replace

graph close _all

gen log_rmse = log(rmse)
label var log_rmse "Log(RMSE)"
tab playerdgp, gen(dgpdum)
tab method, gen(methoddum)
label var methoddum1 "Expert estimate"
label var methoddum2 "Piecewise quintic"
label var methoddum3 "RDRobust conventional"
label var methoddum4 "RDRobust bias-corrected"
label var methoddum5 "IK bandwidth conventional"
label var methoddum6 "RDHonest Taylor (ROT)"

drop methoddum2

clear

* Trimmed data
use "`full'", clear

keep playerdgp rmse_t1 rmse_t2 rmse_t3 rmse_t4 rmse_t5 rmse_t6

reshape long rmse_t, i(playerdgp) j(method)

gen method_name = string(method)
replace method_name = "Expert" if method_name == "1"
replace method_name = "PQ" if method_name == "2"
replace method_name = "CCT Conv." if method_name == "3"
replace method_name = "CCT Rob." if method_name == "4"
replace method_name = "IK" if method_name == "5"
replace method_name = "AK" if method_name == "6"

*By player DGP:
*DGP specific graphs:
levelsof playerdgp, local(levels) 
local count = 1
foreach l of local levels{
preserve
keep if playerdgp == "`l'"
graph hbar rmse_t, over(method_name) ///
	ytitle("RMSE") ///
	title("DGP `count'") ///
	name(graph`count', replace)

local count = `count' + 1
restore
}
graph combine graph1 graph2 graph3 graph4 graph5 graph6 graph7 graph8 graph9 graph10 graph11
graph export "${output}/experts_vs_estimators_rmse_trimmed2.pdf", as(pdf) replace

graph close _all

gen log_rmse_t = log(rmse_t)
label var log_rmse_t "Log(RMSE)"
tab playerdgp, gen(dgpdum)
tab method, gen(methoddum)
label var methoddum1 "Expert estimate"
label var methoddum2 "Piecewise quintic"
label var methoddum3 "RDRobust conventional"
label var methoddum4 "RDRobust bias-corrected"
label var methoddum5 "IK bandwidth conventional"
label var methoddum6 "RDHonest Taylor (ROT)"

cap log close
